*--------------------------------------------------------------------
* Project: Water treatment and E. coli in drinking water: Household responses to (invisible) water quality risks
* File Name: Water Invis_Replication
* Last updated: Akito Kamei (akamei@up.edu.mx) on 2025/18/12
*--------------------------------------------------------------------

/*--------------------------------------------------------------------------------
    0 General program setup
-------------------------------------------------------------------------------*/

	clear               all
	capture log         close _all
	set more            off
	set varabbrev       off
	set emptycells      drop
	set seed            12345
	set linesize        135	
						  
/*------------------------------------------------------------------------------
	1 Select parts of the code to run
------------------------------------------------------------------------------*/
	
	local import		0
	local deidentify	0
	local clean			0
	local tidy			0
	local construct		0
	local analyze		0
	
/*------------------------------------------------------------------------------
	2 Set file paths
------------------------------------------------------------------------------*/

	* Enter the file path to the project folder in Box for every new machine you use
	* Type 'di c(username)' to see the name of your machine
	else if c(username) == "akitokamei" {		
		global Data   "/Users/akitokamei/Library/CloudStorage/Box-Box/MICS Water project/Data/"		
		global Figure "/Users/akitokamei/Dropbox/Apps/Overleaf/MICS_Water/Figure/"
		global Table "/Users/akitokamei/Dropbox/Apps/Overleaf/MICS_Water/Table/"
	}
	
	else if c(username) == "ABC" {		
		global Data   "ABC"
		global Figure "ABC"
		global Table "ABC"

	}

clear all               
set graph on	

use "${Data}MASTER_MICS.dta", clear

/*-----------------------------
     Table 2: Desciptive statistics by the level of source water contamination
-----------------------------*/
use "${Data}MASTER_MICS.dta", clear
global Main urban windex5_1 windex5_2 windex5_3 windex5_4 windex5_5 ///
			WS1_g_11 WS1_g_21 WS1_g_31 WS1_g_32 WS1_g_51 WS1_g_91 WS1_g_96
			
mdesc $Main
local Main "Household Characteristics by Water Source Contamination Level (share)"
local LabelMain "Desc1"
local noteMain "Notes: The table presents the household characteristics across 25 countries."
					 
foreach k in Main {
* Mean
	eststo  model0: estpost summarize $`k' [aw=hhweight] if RiskSource==0
	eststo  model1: estpost summarize $`k' [aw=hhweight] if RiskSource==1
	eststo  model2: estpost summarize $`k' [aw=hhweight] if RiskSource==2

esttab model0 model1 model2 using "${Table}Descript_`k'_Risk.tex", title("``k''" \label{`Label`k''}) ///
	   cell("mean (fmt(2) label(_))") stats(N, fmt("%9.0fc") label(N) ) /// 
	   mtitles("Low risk" "Moderate risk" "High risk") nonum ///
	   substitute( ".00" "" "{l}{\footnotesize" "{p{0.96\linewidth}}{\footnotesize" ///
				   "                    &           _&           _&           _\\" "" ///
				   "Piped water" "\textbf{Primary water source} \\\hline Piped water" ///
				   "Location: In own dwelling" "\textbf{Location} \\\hline Location: In own dwelling" ///
                   "Source water test (100ml)" "\textbf{Water test results} \\\hline Source water test (100ml)" ///
				   "Poorest" "\textbf{Socioeconomic level} \\\hline Poorest" ///
				   "Basic water service" "\textbf{Water source category} \\\hline Basic water service" ///
				   "Any water treatment for primary" "\textbf{Primary water source} \\\hline Any water treatment" ///
				   "25,518" "25,518 (42.8\%)" "21,844" "21,844 (36.6\%)" "12,271" "12,271 (20.6\%)" ///  
				   "-0 " "0" ///
				   "Treat:"  "~~~" "Location:"  "~~~" ///
				   ) ///
	   label  note("`note`k''")  ///
	   replace 
	   }
eststo clear

/*-----------------------------
     Figure 1 (Water treament)
-----------------------------*/
use "${Data}MASTER_MICS.dta", clear

collapse WQ15_g_1 WQ15_g_2 WQ15_g_3 WQ15_g_98 WQ15_g_99    WQ15_g_0 [aw=hhweight], by(Country)
expand 2, gen(Total)
replace Country="Total" if Total==1
gen     sortvar=WQ15_g_0
replace sortvar=WQ15_g_0+100 if Total==1

graph bar WQ15_g_1 WQ15_g_2 WQ15_g_3 WQ15_g_98 WQ15_g_99 WQ15_g_0, ///
      over(Country, label(angle(90)) sort(sortvar)) stack percent ///
      blabel(bar, position(center) size(small) color(black) gap(1) format(%9.0f)) ///
      ytitle("Percentage of Households (%)") ///
      bar(6, color(black*0.1)) ///
      legend(order(1 "Boil" ///
                   2 "Chlorine/Aquatabs/PUR" ///
                   3 "Strain/Settle"  ///
                   4 "Other" 5 "Do not know" 6 "Nothing") ///
      position(12) ring(1) col(3) region(lcolor(none))) ///
      xsize(14) ysize(8) ///
	  graphregion(margin(0 0 0 0)) plotregion(margin(0 0 0 0))
graph export "${Figure}Water_Treat.eps", replace

/*-----------------------------
     Figure 2 
-----------------------------*/
use "${Data}MASTER_MICS.dta", clear

collapse VeryHighRiskSource ModerateHighRiskSource NoRiskSource [aw=hhweight], by(Country)
expand 2, gen(Total)
replace Country="Total" if Total==1
gen     sortvar=VeryHighRiskSource
replace sortvar=sortvar+100 if Total==1

graph bar   VeryHighRiskSource ModerateHighRiskSource NoRiskSource, ///
      over(Country , label(angle(90) labsize(normal)) sort(sortvar)) stack per ///
      blabel(bar, position(center) size(small) format(%9.0f) color(white)) ///
	  ytitle("Percentage of Households (%)") ///
	  bar(1, color(maroon)) bar(2, color(orange)) bar(3, color(navy)) ///
      legend(order(3 "Low risk (<1)" 2 "Moderate risk" "(1 to 100)" 1 "High risk (>100)")  ///
	  position(12) ring(1) col(3) size(normal) region(lcolor(none))) ///
      xsize(14) ysize(8) ///
	  graphregion(margin(0 0 0 0)) plotregion(margin(0 0 0 0)) ///
	  text(110 0 "(a)", place(nw) size(large) color(black))
graph export "${Figure}Water_Category1.eps", replace     


use "${Data}MASTER_MICS.dta", clear

foreach i in VeryHighRiskSource ModerateHighRiskSource NoRiskSource {
	gen     T_`i'_0=0
	replace T_`i'_0=1 if `i'==1 & water_treatment==0
	gen     T_`i'_1=0
	replace T_`i'_1=1 if `i'==1 & water_treatment==1
}
collapse T_VeryHighRiskSource_0 T_VeryHighRiskSource_1 T_ModerateHighRiskSource_0 T_ModerateHighRiskSource_1 NoRiskSource VeryHighRiskSource [aw=hhweight], by(Country)
expand 2, gen(Total)
replace Country="Total" if Total==1
gen     sortvar=VeryHighRiskSource
replace sortvar=sortvar+100 if Total==1

graph bar   T_VeryHighRiskSource_0 T_VeryHighRiskSource_1 T_ModerateHighRiskSource_0 T_ModerateHighRiskSource_1 NoRiskSource, ///
      over(Country , label(angle(90) labsize(normal)) sort(sortvar)) stack per ///
      blabel(bar, position(center) size(small) format(%9.0f) color(white)) ///
	  ytitle("Percentage of Households (%)") ///
	  bar(1, color(maroon*0.3)) bar(2, color(maroon)) ///
				   bar(3, color(orange*0.3)) bar(4, color(orange))bar(5, color(navy)) ///
      legend(order(5 "Low risk (<1)" 4 "Moderate risk" "(1 to 100)" "Treated" ///
	               3 "Moderate risk" "(1 to 100)" "Not treated"   ///
				   2 "High risk (>100)" "Treated" 1 "High risk (>100)" "Not treated" ) ///
	  position(12) ring(1) col(5) size(small) region(lcolor(none))) ///
      xsize(14) ysize(8) ///
	  graphregion(margin(0 0 0 0)) plotregion(margin(0 0 0 0))  ///
	  text(110 0 "(b)", place(nw) size(large) color(black))
graph export "${Figure}Water_Category2.eps", replace    


/*-----------------------------
     Table 3
-----------------------------*/
use "${Data}MASTER_MICS.dta", clear

global ControlsVar i.country_cat i.urban i.WS1_g i.windex5_categ 
mdesc country_cat urban WS1_g windex5_categ water_treatment hhweight RiskSource Cluster_var

* --- Panel A (first 4) ---
eststo clear
eststo A1: reg water_treatment i.RiskSource  $ControlsVar , vce(cluster Cluster_var)
su water_treatment
estadd scalar Mean = r(mean)

eststo A2: reg water_treatment i.RiskSource  $ControlsVar  if Region==1 , vce(cluster Cluster_var)
su water_treatment if Region==1
estadd scalar Mean = r(mean)

eststo A3: reg water_treatment i.RiskSource  $ControlsVar if Region==2 , vce(cluster Cluster_var)
su water_treatment if Region==2
estadd scalar Mean = r(mean)

eststo A4: reg water_treatment i.RiskSource  $ControlsVar if Region==3 , vce(cluster Cluster_var)
su water_treatment if Region==3
estadd scalar Mean = r(mean)

* --- Panel B (last 4) ---
eststo B1: reg WQ15_g_1 i.RiskSource $ControlsVar, vce(cluster Cluster_var)
su WQ15_g_1
estadd scalar Mean = r(mean)

eststo B2: reg WQ15_g_2 i.RiskSource $ControlsVar , vce(cluster Cluster_var)
su WQ15_g_2
estadd scalar Mean = r(mean)

eststo B3: reg WQ15_g_3 i.RiskSource $ControlsVar , vce(cluster Cluster_var)
su WQ15_g_3
estadd scalar Mean = r(mean)

eststo B4: reg WQ15_g_98 i.RiskSource $ControlsVar , vce(cluster Cluster_var)
su WQ15_g_98
estadd scalar Mean = r(mean)

esttab A1  using "${Table}tmp_panelA1.tex", replace fragment ///
    drop(_cons *country_cat *WS1_g *urban *windex5_categ) ///
    label ar2 nonotes nobase nocons level(95) nonum ///
    cells("b(fmt(2)) ci(fmt(2) par) p(fmt(2))") ///
	collabels("Coef." "95\% CI" "p-value") ///
	mtitle("Overall") ///
	substitute("Moderate risk       &" "\multicolumn{5}{l}{Base: Low risk} \\ \shortstack[c]{Moderate risk}&") ///
    stats(Mean N, fmt(%9.3fc %9.0fc) labels(`"Mean"' `"N"'))

esttab A2 A3 A4 using "${Table}tmp_panelA2.tex", replace fragment ///
    drop(_cons *country_cat *WS1_g *urban *windex5_categ) ///
    label ar2 nonotes nobase nocons level(95)  nonum ///
    cells("b(fmt(2)) ci(fmt(2) par) p(fmt(2))") ///
	collabels("Coef." "95\% CI" "p-value") ///
    mtitle("Africa" "\shortstack[c]{Latin America}" "Asia") ///
	substitute("Moderate risk       &" "\multicolumn{5}{l}{Base: Low risk} \\ \shortstack[c]{Moderate risk}&") ///
    stats(Mean N, fmt(%9.3fc %9.0fc) labels(`"Mean"' `"N"'))
		  
esttab B1 B2 B3 using "${Table}tmp_panelB1.tex", replace fragment ///
    drop(_cons *country_cat *WS1_g *urban *windex5_categ) ///
    label ar2 nonotes nobase nocons level(95) nonum ///
    cells("b(fmt(2)) ci(fmt(2) par) p(fmt(2))") ///
	collabels("Coef." "95\% CI" "p-value") ///
    mtitle("Boil" "\shortstack[c]{Chlorine/Aquatabs/PUR}" "\shortstack[c]{Strain/Settle}") ///
	substitute("Moderate risk       &" "\multicolumn{5}{l}{Base: Low risk} \\ \shortstack[c]{Moderate risk}&") ///
    stats(Mean N, fmt(%9.3fc %9.0fc) labels(`"Mean"' `"N"'))
		  
************************************************************
* 3) Write a single LaTeX wrapper that stacks Panel A over Panel B
************************************************************
file close _all
file open fh using "${Table}Est_Treat_Risk_AF_LATAM_AS_.tex", write replace
file write fh "\begin{table}[!ht]\centering" _n
file write fh "\begin{adjustwidth}{-2.25in}{0in}" _n
file write fh "\caption{Water Source E. Coli Contamination and the Probability of Household Water Treatment Decision, Overall (N=59,633) and by Region}\label{ETRSource}" _n
file write fh "\begin{tabular}{lccc|ccc|cccc}" _n
file write fh "\hline \hline" _n
file write fh "\multicolumn{8}{l}{\textbf{Panel A: Dependent var — Water treated (yes = 1, no = 0)}}\\[2pt]" _n
file write fh "& \multicolumn{3}{c}{(1)} \\" _n
file write fh "\input{Table/tmp_panelA1.tex}\\" _n
file write fh "\multicolumn{8}{l}{\textbf{Panel B: Dependent var — Water treated (yes = 1, no = 0) by region}}\\[2pt]" _n
file write fh "& \multicolumn{3}{c}{(2)}& \multicolumn{3}{c}{(3)}& \multicolumn{3}{c}{(4)} \\" _n
file write fh "\input{Table/tmp_panelA2.tex}\\" _n
file write fh "\multicolumn{9}{l}{\textbf{Panel C: Dependent var — Boiling, Chlorine, and Strain/Settle (yes = 1, no = 0)}}\\[2pt]" _n
file write fh "& \multicolumn{3}{c}{(5)}& \multicolumn{3}{c}{(6)}& \multicolumn{3}{c}{(7)} \\" _n
file write fh "\input{Table/tmp_panelB1.tex}\\ \hline\hline" _n
file write fh "\multicolumn{11}{p{0.95\linewidth}}{All regressions include country fixed effects, water source type, urban/rural status, and household socioeconomic status based on an asset-based wealth index. Risk categories in the regression are defined as follows: $<$ 1 CFU/100 mL (low risk), 1–100 (moderate risk), and $>$ 100 (high risk). Robust standard errors are clustered at the primary sampling unit (PSU) level.}" _n
file write fh "\end{tabular}" _n
file write fh "\end{adjustwidth}" _n
file write fh "\end{table}" _n
file close fh

/*-----------------------------
     Table 3 (Robustness)
-----------------------------*/
use "${Data}MASTER_MICS.dta", clear

global ControlsVar i.country_cat i.urban i.WS1_g i.windex5_categ 
mdesc country_cat urban WS1_g windex5_categ water_treatment hhweight RiskSource Cluster_var

eststo clear

* (1) LPM (linear probability model)
eststo R1: reg water_treatment i.RiskSource $ControlsVar, vce(cluster Cluster_var)
quietly su water_treatment
estadd scalar Mean = r(mean)

* (2) Logit -> AME
quietly logit water_treatment i.RiskSource $ControlsVar, vce(cluster Cluster_var)
quietly margins, dydx(i.RiskSource) post
eststo R2
quietly su water_treatment
estadd scalar Mean = r(mean)

* (3) GEE -> AME
xtset Cluster_var
quietly xtgee water_treatment i.RiskSource $ControlsVar,  family(binomial) link(logit) corr(exchangeable) vce(robust)
quietly margins, dydx(i.RiskSource) post
eststo R3
quietly su water_treatment
estadd scalar Mean = r(mean)

* (4) GLMM -> AME
melogit water_treatment i.RiskSource $ControlsVar || Cluster_var:, covariance(identity) vce(robust)
* compute AMEs and make them the active results
eststo R4: margins, dydx(i.RiskSource) post
quietly su water_treatment
estadd scalar Mean = r(mean)
est replay R4
ereturn list           // should show cmdname: margins
matrix list e(b)       // these are AMEs, not log-odds

* Panel A (R1 R2)
esttab R1 R2 using "${Table}tmp_panelA_robust.tex", replace fragment ///
    label nonotes nobase nocons level(95) ///
    cells("b(fmt(2)) ci(fmt(2) par) p(fmt(2))") ///
    collabels("Coef." "95\% CI" "p-value") ///
    mtitle("LPM" "Logit") ///
    drop(_cons *country_cat *WS1_g *urban *windex5_categ) ///
	substitute("Moderate risk       &" "\multicolumn{5}{l}{Base: Low risk} \\ \shortstack[c]{Moderate risk}&") ///
    stats(Mean N, fmt(%9.2fc %9.0fc) labels("Mean" "N"))

* Panel B (R3 R4)
esttab R3 R4 using "${Table}tmp_panelB_robust.tex", replace fragment ///
    label nonotes nobase nocons level(95) ///
    cells("b(fmt(2)) ci(fmt(2) par) p(fmt(2))") ///
    collabels("Coef." "95\% CI" "p-value") ///
    mtitle("GEE (logit)" "GLMM (logit)") ///
	substitute("Moderate risk       &" "\multicolumn{5}{l}{Base: Low risk} \\ \shortstack[c]{Moderate risk}&") ///
    stats(Mean N, fmt(%9.2fc %9.0fc) labels("Mean" "N"))

*===========================
* LaTeX wrapper: Panel A on top, Panel B below
*===========================
file close _all
file open fh using "${Table}Robustness_Treat_Risk_Marginal.tex", write replace
file write fh "\begin{table}[!htbp]\centering" _n
file write fh "\caption{Robustness: LPM and Marginal Effects (Logit, GEE, GLMM)}\label{tab:robust_marginal}" _n
file write fh "\footnotesize" _n
file write fh "\begin{tabular}{lcccccccc}" _n
file write fh "\hline \hline" _n
file write fh "\input{${Table}tmp_panelA_robust.tex}\\\\[2pt]" _n
file write fh "\input{${Table}tmp_panelB_robust.tex}\\ \hline\hline" _n
file write fh "\multicolumn{9}{p{0.75\linewidth}}{\footnotesize Note: Column (1) reports LPM coefficients. Columns (2)–(4) report average marginal effects (AMEs) from logit evaluated at discrete change from the base level, GEE-logit, and GLMM-logit, respectively. All specifications include country fixed effects, water source type, urban/rural status, and wealth-index controls. Robust standard errors are clustered at the primary sampling unit (PSU) level.} " _n
file write fh "\end{tabular}" _n
file write fh "\end{table}" _n
file close fh

/*-----------------------------
     Figure 3
-----------------------------*/
use "${Data}MASTER_MICS.dta", clear

collapse VeryHighRiskHome ModerateHighRiskHome NoRiskHome [aw=hhweight], by(Country)
expand 2, gen(Total)
replace Country="Total" if Total==1
gen     sortvar=VeryHighRiskHome
replace sortvar=VeryHighRiskHome+100 if Total==1

graph bar   VeryHighRiskHome ModerateHighRiskHome NoRiskHome , ///
      over(Country , label(angle(90) labsize(normal)) sort(sortvar)) stack per ///
      blabel(bar, position(center) size(small) format(%9.0f) color(white)) ///
	  ytitle("Percentage of Households (%)") ///
	  bar(1, color(maroon)) bar(2, color(orange)) bar(3, color(navy)) ///
      legend(order(3 "Low risk (<1)" 2 "Moderate risk" "(1 to 100)" 1 "High risk (>100)")  ///
      position(12) ring(1) col(6) size(normal) region(lcolor(none))) ///
      xsize(14) ysize(8) ///
	  graphregion(margin(0 0 0 0)) plotregion(margin(0 0 0 0))  ///
	  text(110 0 "(a)", place(nw) size(large) color(black))
graph export "${Figure}Water_Category_Home1.eps", replace     


use "${Data}MASTER_MICS.dta", clear
foreach i in VeryHighRiskHome ModerateHighRiskHome NoRiskHome {
	gen     T_`i'_0=0
	replace T_`i'_0=1 if `i'==1 & water_treatment==0
	gen     T_`i'_1=0
	replace T_`i'_1=1 if `i'==1 & water_treatment==1
}

collapse T_VeryHighRiskHome_0 T_VeryHighRiskHome_1 T_ModerateHighRiskHome_0 T_ModerateHighRiskHome_1 T_NoRiskHome_0 T_NoRiskHome_1 VeryHighRiskHome [aw=hhweight], by(Country)
expand 2, gen(Total)
replace Country="Total" if Total==1
gen     sortvar=VeryHighRiskHome
replace sortvar=VeryHighRiskHome+100 if Total==1

graph bar   T_VeryHighRiskHome_0 T_VeryHighRiskHome_1 T_ModerateHighRiskHome_0 T_ModerateHighRiskHome_1 T_NoRiskHome_0 T_NoRiskHome_1, ///
      over(Country , label(angle(90) labsize(normal)) sort(sortvar)) stack per ///
      blabel(bar, position(center) size(small) format(%9.0f) color(white)) ///
	  ytitle("Percentage of Households (%)") ///
	  bar(1, color(maroon*0.3)) bar(2, color(maroon)) ///
				   bar(3, color(orange*0.3)) bar(4, color(orange)) ///
				   bar(5, color(navy*0.3))   bar(6, color(navy)) ///
      legend(order(6 "Low risk (<1)" "Treated" 5 "Low risk (<1)" "Not treated"  ///
	               4 "Moderate risk" "(1 to 100)" "Treated" 3 "Moderate risk" "(1 to 100)" "Not treated"  ///
				   2 "High risk" "(>100)" "Treated" 1 "High risk" "(>100)" "Not treated" )  ///
	  position(12) ring(1) col(6) size(small) region(lcolor(none))) ///
      xsize(14) ysize(8) ///
	  graphregion(margin(0 0 0 0)) plotregion(margin(0 0 0 0))  ///
	  text(110 0 "(b)", place(nw) size(large) color(black))
graph export "${Figure}Water_Category_Home2.eps", replace  

/*-----------------------------
     Figure 4
-----------------------------*/
use "${Data}MASTER_MICS.dta", clear

keep NoRiskHome ModerateHighRiskHome VeryHighRiskHome NoRiskSource ModerateHighRiskSource VeryHighRiskSource water_treatment WQ15_g hhweight
gen ID=_n
foreach i in NoRisk ModerateHighRisk VeryHighRisk {
	rename `i'Source `i'1
	rename `i'Home `i'2
	replace `i'1=100 if `i'1==1
	replace `i'2=100 if `i'2==1
}
reshape long NoRisk ModerateHighRisk VeryHighRisk, i(ID water_treatment WQ15_g) j(Loc)
label define Locl 1 "Source" 2 "Drinking", modify
label values Loc Locl
label define WQ15_gl 0 "No treatment" 1 "Boil" 2 `" "Chlorine" "Aquatabs" "PUR" "' 3 `" "Strain" "Settle" "', modify
graph bar VeryHighRisk ModerateHighRisk NoRisk if WQ15_g != 98 & WQ15_g != 99, ///
    stack over(Loc, label(angle(45) labsize(normal))) over(WQ15_g) ///
    blabel(bar, position(inside) format(%9.0f) color(white)) ///
	ytitle("Percentage of Households (%)") ///
    bar(1, color(maroon)) bar(2, color(orange)) bar(3, color(navy)) ///
    legend(order(3 "Low risk (<1)" 2 "Moderate risk (1 to 100)" 1 "High risk (>100)") ///
           position(12) ring(1) col(3) size(small) region(lcolor(none)))
graph export "${Figure}Desc2.eps", replace






